#!/bin/bash

debug=0
me="${0##*/}"
useragent="Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0"
file=""
urlbase="https://www.geonames.org/"
deps=( curl xmllint )
max=10
only=0
nonl=0 # omit final newline if set to 1
sep='\n'

usage() {
[[ "$*" != "" ]] && echo "$*"
cat <<EOF

Enter your query to get a location ID required to retrieve weather data.
Queries $urlbase

Dependencies: ${deps[@]}

Usage: $me [options] [free form location string]

Options:

-m int  Maximum number of results (default $max)

-U str  Override user agent. Default:
        $useragent

-o      only output numerical id. Combine with "-m1" for usage in scripts.

-d      Debugging output

-f      Fuzzy search on

-m int  Maximum Results (default: $max)

-F str  Parse local file. Invalidates -U option and query.
        Mostly for debugging.

-N      Omit final newline

-s str  Result separator. Default: $sep

EOF
exit 1
}
xpath() { 
	echo "$2" | xmllint --html --noblanks --nowrap --recover --nonet --xpath "$1" - 2>/dev/null | perl -CS -MHTML::Entities -pe 'decode_entities($_);'
	# xmllint creates loads of error messages even when it's working correctly
	# if you really want to see these messages, remove '2>/dev/null'
}

# simple dependency check
for dep in "${deps[@]}"; do
	type -f $dep >/dev/null || usage
done
perl -MHTML::Entities -e 1 || exit 1

while getopts "fF:U:hdm:os:N" opt; do
  case $opt in
    m) [[ "$OPTARG" =~ [0-9]+ ]] && (( OPTARG > 0 )) || usage "Option -${opt}: invalid number $OPTARG"
       max="$OPTARG"
    ;;
    F) [ -r "$OPTARG" ] && file="$OPTARG" || usage "cannot read $OPTARG"
    ;;
    U) useragent="$OPTARG"
    ;;
    f) fuzzy=1
    ;;
    d) debug=1
    ;;
    o) only=1
    ;;
    N) nonl=1
    ;;
    s) sep="$OPTARG"
    ;;
    *) usage
      ;;
  esac
done
shift $((OPTIND-1))

query="$*"

[[ "$debug" == 0 ]] && exec 2>/dev/null
echo "useragent: $useragent" >&2
echo "debug: $debug" >&2

[[ "$fuzzy" == 1 ]] && urlbase="$urlbase/search.html?fuzzy=0.6" || urlbase="$urlbase/search.html"
if [[ "$file" == "" ]]; then
      # cannot use read -p "..." because the prompt is output to stderr.
    [[ "$query" == "" ]] && echo -n "Please enter a free form search for your location: " && read -r query
    html="$(curl --data-urlencode "q=$query" --user-agent "$useragent" "$urlbase")"
else
    html="$(<"$file")"
fi
echo "location: $query" >&2

for((j=3;j<max+3;j++)); do
    #~ query="//table[@class=\"restable\"]//tr[$j]/td[1]/a/@href | //table[@class=\"restable\"]//tr[$j]/td[2]/span[@class=\"geo\"]/span/text() | \
    #~ //table[@class=\"restable\"]//tr[$j]/td[2]/a/text() | //table[@class=\"restable\"]//tr[$j]/td[3]/text() | \
    #~ //table[@class=\"restable\"]//tr[$j]/td[3]/small/text()"
    # without lat/lon info
    query="//table[@class=\"restable\"]//tr[$j]/td[1]/a/@href | \
    //table[@class=\"restable\"]//tr[$j]/td[2]/a/text() | //table[@class=\"restable\"]//tr[$j]/td[3]/text() | \
    //table[@class=\"restable\"]//tr[$j]/td[3]/small/text()"
    #~ xpath "$query" "$html"; exit
    mapfile -t table <<<"$(xpath "$query" "$html")"
    id="${table[0]}"
    id="${id#*/}"
    id="${id%/*}"
    [[ "$id" == "" ]] && continue
    ((j>3)) && printf "$sep"
    printf '%s ' "$id"
    [[ "$only" == 1 ]] && echo && continue
    for((i=1;i<${#table[@]};i++)); do printf '%s ' "${table[i]#, }"; done
    #~ town="${table[$((i+1))]}${table[$((i+4))]}"
    #~ caption="${table[$((i+5))]}"
    #~ lat="${table[$((i+2))]}"
    #~ lon="${table[$((i+3))]}"
    #~ printf "\n%-10s%-18s %s\n          %-18s (%s)\n" "$id" "$lat" "$town" "$lon" "$caption"
done
((nonl==0)) && echo
